package cn.iocoder.yudao.module.ai.service.websearch;

import cn.hutool.core.collection.CollUtil;
import cn.hutool.http.HttpRequest;
import cn.hutool.http.HttpResponse;
import cn.hutool.json.JSONArray;
import cn.hutool.json.JSONObject;
import cn.hutool.json.JSONUtil;
import cn.iocoder.yudao.module.ai.service.websearch.vo.AiWebSearchRespVO;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
 * Bing Web 搜索实现类
 */
@Service
@Slf4j
public class WebSearchServiceImpl implements WebSearchService {

    /**
     * google url
     */
    private static final String GOOGLE_URL = "https://google.serper.dev/search";
    /**
     * bing url
     */
    private static final String BING_URL = "https://api.bing.microsoft.com/v7.0/search";

    @Value("${yudao.web-search.api-key:}")
    private String bingApiKey;

    @Value("${yudao.web-search.api-key:}")
    private String googleApiKey;

    /**
     * bing 搜索
     *
     * @param query 搜索关键词
     * @param count 返回结果数量
     * @return 搜索结果列表
     */
    @Override
    public List<AiWebSearchRespVO> bingSearch(String query, Integer count) {
        if (query == null || query.isEmpty()) {
            return CollUtil.newArrayList();
        }

        try {
            // 发送请求
            HttpResponse response = HttpRequest.get(BING_URL)
                    .header("Ocp-Apim-Subscription-Key", bingApiKey)
                    .form("q", query)
                    .form("count", String.valueOf(count))
                    .form("responseFilter", "Webpages")
                    .form("textFormat", "Raw")
                    .execute();

            // 解析响应
            String body = response.body();
            JSONObject json = JSONUtil.parseObj(body);

            // 处理结果
            List<AiWebSearchRespVO> results = new ArrayList<>();
            if (json.containsKey("webPages") && json.getJSONObject("webPages").containsKey("value")) {
                JSONArray items = json.getJSONObject("webPages").getJSONArray("value");
                for (int i = 0; i < items.size(); i++) {
                    JSONObject item = items.getJSONObject(i);
                    AiWebSearchRespVO result = new AiWebSearchRespVO()
                            .setTitle(item.getStr("name"))
                            .setUrl(item.getStr("url"))
                            .setSnippet(item.getStr("snippet"));
                    results.add(result);
                }
            }

            return results;
        } catch (Exception e) {
            log.error("[bingSearch][查询({}) 发生异常]", query, e);
            return CollUtil.newArrayList();
        }
    }

    /**
     * Google 搜索（使用 Serper API）
     *
     * @param query 搜索关键词
     * @param count 返回结果数量
     * @return 搜索结果列表
     */
    @Override
    public List<AiWebSearchRespVO> googleSearch(String query, Integer count) {
        if (query == null || query.isEmpty()) {
            return CollUtil.newArrayList();
        }

        try {
            // 构建请求体
            JSONObject payload = new JSONObject();
            payload.set("q", query);
            payload.set("gl", "cn");
            payload.set("num", count);

            // 发送请求
            HttpResponse response = HttpRequest.post(GOOGLE_URL)
                    .header("X-API-KEY", googleApiKey)
                    .header("Content-Type", "application/json")
                    .body(payload.toString())
                    .execute();

            // 解析响应
            String body = response.body();
            JSONObject json = JSONUtil.parseObj(body);
            JSONArray organicResults = json.getJSONArray("organic");

            // 处理结果
            List<AiWebSearchRespVO> results = new ArrayList<>();
            for (int i = 0; i < organicResults.size(); i++) {
                JSONObject item = organicResults.getJSONObject(i);
                AiWebSearchRespVO result = new AiWebSearchRespVO()
                        .setTitle(item.getStr("title"))
                        .setUrl(item.getStr("link"))
                        .setSnippet(item.containsKey("snippet") ? item.getStr("snippet") : "");
                results.add(result);
            }
            return results;
        } catch (Exception e) {
            log.error("[googleSearch][查询({}) 发生异常]", query, e);
            return CollUtil.newArrayList();
        }
    }

    /**
     * web 爬虫
     *
     * @param urls 爬虫地址
     * @return key: url value：爬虫内容
     */
    @Override
    public Map<String, String> webCrawler(List<String> urls) {
        if (CollUtil.isEmpty(urls)) {
            return Map.of();
        }

        Map<String, String> result = new HashMap<>();
        for (String url : urls) {
            try {
                // 解析URL以获取域名作为Origin
                String origin = extractOrigin(url);

                // 发送HTTP请求获取网页内容
                HttpResponse response = HttpRequest.get(url)
                        .header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
                        .header("Origin", origin)
                        .header("Referer", origin)
                        .header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7")
                        .header("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8")
                        .header("Cache-Control", "max-age=0")
                        .timeout(10000) // 设置10秒超时
                        .execute();

                if (response.isOk()) {
                    String html = response.body();

                    // 使用Jsoup解析HTML并提取文本内容
                    org.jsoup.nodes.Document doc = org.jsoup.Jsoup.parse(html);

                    // 移除script和style元素，它们包含的内容不是我们需要的文本
                    doc.select("script, style, meta, link").remove();

                    // 获取body中的文本内容
                    String text = doc.body().text();

                    // 清理文本（移除多余空格）
                    text = text.replaceAll("\\s+", " ").trim();

                    result.put(url, text);
                } else {
                    log.warn("[webCrawler][URL({}) 请求失败，状态码: {}]", url, response.getStatus());
                    result.put(url, "");
                }
            } catch (Exception e) {
                log.error("[webCrawler][URL({}) 爬取异常]", url, e);
                result.put(url, "");
            }
        }

        return result;
    }

    /**
     * 从URL中提取Origin
     *
     * @param url 完整URL
     * @return Origin (scheme://host[:port])
     */
    private String extractOrigin(String url) {
        try {
            java.net.URL parsedUrl = new java.net.URL(url);
            return parsedUrl.getProtocol() + "://" + parsedUrl.getHost() +
                   (parsedUrl.getPort() == -1 ? "" : ":" + parsedUrl.getPort());
        } catch (Exception e) {
            log.warn("[extractOrigin][URL({}) 解析异常]", url, e);
            return "";
        }
    }
}
